#Current health expenditure (% of GDP) (Source: https://data.worldbank.org/indicator/SH.XPD.CHEX.GD.ZS)
wb_data <- read_csv("data/healthcare-spending/world_bank_data.csv") %>%
clean_names()
skimr::skim(wb_data)
| Name | wb_data |
| Number of rows | 266 |
| Number of columns | 69 |
| _______________________ | |
| Column type frequency: | |
| character | 7 |
| logical | 42 |
| numeric | 20 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| country_code | 0 | 1.00 | 3 | 3 | 0 | 266 | 0 |
| country_name | 0 | 1.00 | 4 | 52 | 0 | 266 | 0 |
| indicator_name | 0 | 1.00 | 37 | 37 | 0 | 1 | 0 |
| indicator_code | 0 | 1.00 | 17 | 17 | 0 | 1 | 0 |
| region | 49 | 0.82 | 10 | 26 | 0 | 7 | 0 |
| income_group | 50 | 0.81 | 10 | 19 | 0 | 4 | 0 |
| special_notes | 140 | 0.47 | 16 | 1177 | 0 | 111 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| x1960 | 266 | 0 | NaN | : |
| x1961 | 266 | 0 | NaN | : |
| x1962 | 266 | 0 | NaN | : |
| x1963 | 266 | 0 | NaN | : |
| x1964 | 266 | 0 | NaN | : |
| x1965 | 266 | 0 | NaN | : |
| x1966 | 266 | 0 | NaN | : |
| x1967 | 266 | 0 | NaN | : |
| x1968 | 266 | 0 | NaN | : |
| x1969 | 266 | 0 | NaN | : |
| x1970 | 266 | 0 | NaN | : |
| x1971 | 266 | 0 | NaN | : |
| x1972 | 266 | 0 | NaN | : |
| x1973 | 266 | 0 | NaN | : |
| x1974 | 266 | 0 | NaN | : |
| x1975 | 266 | 0 | NaN | : |
| x1976 | 266 | 0 | NaN | : |
| x1977 | 266 | 0 | NaN | : |
| x1978 | 266 | 0 | NaN | : |
| x1979 | 266 | 0 | NaN | : |
| x1980 | 266 | 0 | NaN | : |
| x1981 | 266 | 0 | NaN | : |
| x1982 | 266 | 0 | NaN | : |
| x1983 | 266 | 0 | NaN | : |
| x1984 | 266 | 0 | NaN | : |
| x1985 | 266 | 0 | NaN | : |
| x1986 | 266 | 0 | NaN | : |
| x1987 | 266 | 0 | NaN | : |
| x1988 | 266 | 0 | NaN | : |
| x1989 | 266 | 0 | NaN | : |
| x1990 | 266 | 0 | NaN | : |
| x1991 | 266 | 0 | NaN | : |
| x1992 | 266 | 0 | NaN | : |
| x1993 | 266 | 0 | NaN | : |
| x1994 | 266 | 0 | NaN | : |
| x1995 | 266 | 0 | NaN | : |
| x1996 | 266 | 0 | NaN | : |
| x1997 | 266 | 0 | NaN | : |
| x1998 | 266 | 0 | NaN | : |
| x1999 | 266 | 0 | NaN | : |
| x2020 | 266 | 0 | NaN | : |
| x2021 | 266 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| x2000 | 34 | 0.87 | 5.60 | 2.73 | 1.51 | 3.97 | 5.04 | 6.79 | 24.24 | ▇▃▁▁▁ |
| x2001 | 34 | 0.87 | 5.72 | 2.52 | 1.76 | 4.06 | 5.21 | 6.99 | 19.19 | ▇▆▁▁▁ |
| x2002 | 33 | 0.88 | 5.81 | 2.41 | 1.70 | 4.17 | 5.15 | 7.13 | 14.38 | ▅▇▃▁▁ |
| x2003 | 31 | 0.88 | 5.90 | 2.41 | 1.92 | 4.19 | 5.36 | 7.18 | 15.42 | ▆▇▃▁▁ |
| x2004 | 31 | 0.88 | 5.95 | 2.46 | 1.66 | 4.29 | 5.34 | 7.41 | 17.26 | ▇▇▃▁▁ |
| x2005 | 31 | 0.88 | 5.99 | 2.71 | 1.50 | 4.12 | 5.28 | 7.65 | 21.96 | ▇▆▁▁▁ |
| x2006 | 31 | 0.88 | 5.95 | 2.65 | 1.52 | 4.18 | 5.29 | 7.72 | 22.02 | ▇▆▁▁▁ |
| x2007 | 31 | 0.88 | 5.96 | 2.80 | 1.57 | 4.12 | 5.29 | 7.57 | 24.23 | ▇▅▁▁▁ |
| x2008 | 31 | 0.88 | 6.01 | 2.85 | 1.64 | 4.00 | 5.30 | 7.72 | 23.00 | ▇▅▁▁▁ |
| x2009 | 31 | 0.88 | 6.38 | 2.78 | 1.84 | 4.34 | 5.57 | 8.06 | 16.43 | ▆▇▅▁▁ |
| x2010 | 30 | 0.89 | 6.27 | 2.79 | 1.82 | 4.25 | 5.47 | 8.06 | 16.88 | ▆▇▅▁▁ |
| x2011 | 29 | 0.89 | 6.20 | 2.76 | 1.55 | 4.31 | 5.47 | 7.97 | 16.20 | ▆▇▅▁▁ |
| x2012 | 30 | 0.89 | 6.21 | 2.70 | 1.26 | 4.27 | 5.52 | 7.91 | 16.18 | ▅▇▃▁▁ |
| x2013 | 31 | 0.88 | 6.33 | 2.76 | 1.88 | 4.43 | 5.57 | 7.96 | 18.60 | ▇▇▃▁▁ |
| x2014 | 31 | 0.88 | 6.38 | 2.82 | 1.91 | 4.45 | 5.50 | 7.91 | 19.73 | ▇▆▂▁▁ |
| x2015 | 31 | 0.88 | 6.50 | 2.89 | 1.82 | 4.60 | 5.72 | 7.83 | 20.41 | ▇▆▂▁▁ |
| x2016 | 32 | 0.88 | 6.52 | 2.84 | 1.63 | 4.56 | 5.82 | 7.89 | 17.37 | ▅▇▃▁▁ |
| x2017 | 31 | 0.88 | 6.49 | 2.78 | 1.64 | 4.61 | 6.03 | 8.04 | 16.81 | ▅▇▃▁▁ |
| x2018 | 31 | 0.88 | 6.46 | 2.85 | 1.60 | 4.47 | 5.87 | 8.03 | 19.04 | ▇▇▃▁▁ |
| x2019 | 32 | 0.88 | 6.53 | 2.99 | 1.53 | 4.48 | 5.85 | 7.94 | 23.96 | ▇▆▁▁▁ |
wb_data <- wb_data %>%
remove_empty(which=c("rows", "cols"), quiet=FALSE)
# get time series data
wb2 <- wb_data %>%
pivot_longer(cols=c("x2000", "x2001", "x2002", "x2003", "x2004", "x2005", "x2006", "x2007", "x2008", "x2009", "x2010", "x2011", "x2012", "x2013", "x2014", "x2015", "x2016", "x2017", "x2018", "x2019"),
names_to="Year",
values_to="Spending") %>%
mutate(Year = as.integer(substr(Year, 2, 5)))
head(wb2)
## # A tibble: 6 × 9
## country_code country_name indic…¹ indic…² region incom…³ speci…⁴ Year Spend…⁵
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <int> <dbl>
## 1 ABW Aruba Curren… SH.XPD… Latin… High i… <NA> 2000 NA
## 2 ABW Aruba Curren… SH.XPD… Latin… High i… <NA> 2001 NA
## 3 ABW Aruba Curren… SH.XPD… Latin… High i… <NA> 2002 NA
## 4 ABW Aruba Curren… SH.XPD… Latin… High i… <NA> 2003 NA
## 5 ABW Aruba Curren… SH.XPD… Latin… High i… <NA> 2004 NA
## 6 ABW Aruba Curren… SH.XPD… Latin… High i… <NA> 2005 NA
## # … with abbreviated variable names ¹indicator_name, ²indicator_code,
## # ³income_group, ⁴special_notes, ⁵Spending
eu <- c("AUT", "GBR", "BEL", "BGR", "CYP", "HRV", "CZE", "DNK", "EST", "FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA", "LVA", "LTU", "LUX", "MLT", "NLD", "POL", "PRT", "ROM", "SVK", "SVN", "ESP", "SWE")
wb2 %>%
filter(country_code %in% eu) %>%
mutate(is_gbr = ifelse(country_code=="GBR", 1, 0)) %>%
group_by(is_gbr, Year) %>%
summarize(spending_avg=mean(Spending, na.rm=TRUE)) %>%
mutate(label = ifelse(Year != 2019, NA_character_, ifelse(is_gbr==1, "GBR", "EU"))) %>%
ggplot(aes(x=Year, y=spending_avg, color=as.factor(is_gbr))) +
geom_vline(
xintercept = seq(1999, 2019, 1), color = "grey80", size = .4) +
geom_segment(
data = tibble(y = seq(0, 11, 1), x1 = 1999, x2 = 2019),
aes(x = x1, xend = x2, y = y, yend = y),
inherit.aes = FALSE,
color = "grey80",
size = .4
) +
geom_segment(
data = tibble(y = 0, x1 = 1999, x2 = 2019),
aes(x = x1, xend = x2, y = y, yend = y),
inherit.aes = FALSE,
color = "grey80",
size = .4
) +
geom_point() +
geom_line() +
labs(title = "<b> UK spends relatively more on healthcare than the EU average</b><br>
<span style = 'font-size:12pt'>Health Expenditure as % of GDP in <span style='color:#235eb8'> EU countries </span> and <span style='color:#b9bb5e'> the UK </span></span>",
y = NULL, x = NULL,
caption = "Source: The World Bank (https://data.worldbank.org/indicator/SH.XPD.CHEX.GD.ZS)") +
theme_nhs() +
scale_x_continuous(breaks = seq(1999, 2019, 1), limits = c(1999, 2021), expand = c(0, 0)) +
scale_y_continuous(breaks = seq(0, 11, 1), labels = function(x) paste0(x, "%"), limits = c(0, 11)) +
theme(axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
axis.ticks = element_line(color = "grey80",),
plot.title.position = "plot",
plot.title = ggtext::element_textbox_simple(size=16)) +
geom_text_repel(
aes(color = as.factor(is_gbr), label = label),
xlim = c(2019, NA),
family = "Helvetica",
fontface = "bold",
size = 4,
segment.size = .7,
segment.alpha = .5,
segment.linetype = "dotted"
) +
scale_color_manual(values = c(palette[1], palette[7]))
dataf <- data.frame(
Year=c("2007/8","2008/9","2009/10","2010/11","2011/12", "2012/13", "2013/14", "2014/15", "2015/16", "2016/17", "2017/18", "2018/19", "2019/20", "2020/21", "2021/22") ,
Funding=c(111.4, 116.6 ,124.1 , 124.5, 125.7, 126, 128.5, 131.2, 134.9, 135.7, 138.4, 141.4, 148.9, 191.01, 190.3))
p = ggplot(data= dataf, aes(x=Year, y=Funding, group = 1)) +
geom_line(linetype= "dashed", color="blue", size=0.5)+
geom_point(color="red", size=2) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
rect <- data.frame(xmin="2019/20", xmax= "2021/22", ymin=148.9, ymax=Inf)
p + geom_rect(data=rect, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax),
color="grey20",
alpha=0.1,
inherit.aes = FALSE) +
labs(
x = "Year",
y = "Funding (£bn)",
title = "COVID-19 caused sharp increases in NHS spending",
subtitle = "NHS Funding by year",
caption = "Source:The King's Fund analysis of HM Treasury data"
) +
annotate("text", x = "2020/21", y = 153, label = "italic(COVID-19)",size = 2.2,
parse = TRUE) +
theme_nhs() +
theme(
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey80")
)
datag = data.frame(x = c("Social Protection", "Health", "General Public Services", "Education"), y = c(299, 217, 109, 100))
p <- ggplot(datag, aes(x=x, y=y)) +
geom_segment(
aes(x=x, xend=x, y=0, yend=y),
color=ifelse(datag$x %in% c("Health"), "orange", "grey20"),
size=ifelse(datag$x %in% c("Health"), 1.3, 0.7)
) +
geom_point(
color=ifelse(datag$x %in% c("Health"), "orange", "grey20"),
size=ifelse(datag$x %in% c("Health"), 5, 2)) +
labs(
x = "Sector",
y = "Funding (£bn)",
title = "Health is the second highest expenditure",
subtitle = "Government Expenditure by Sector 2021/22",
caption= "Source:Public spending: a brief introduction-Philip Brien(07 October, 2022)"
) +
coord_flip() +
theme_nhs() +
theme(
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey80")
)
p
#https://digital.nhs.uk/data-and-information/publications/statistical/acute-patient-level-activity-and-costing/2019-20#resources
cost <- read.csv("data/healthcare-spending/Acute Patient Level Activity and Costing 2019-20 CSV data.csv") %>%
janitor::clean_names()
cost1 <- cost %>%
filter(summary_type=="Total Cost",
org_name=="ALL SUBMITTERS",
breakdown!="Total Cost",
breakdown_group_2!="Unknown")
kid <- c("0 years", "01-04 years", "05-09 years", "10-14 years")
young_adult <- c("15-19 years", "20-24 years", "25-29 years")
adult <- c("30-34 years", "35-39 years", "40-44 years", "45-49 years")
elderly <- c("50-54 years", "55-59 years", "60-64 years", "65-69 years")
old <- c("70-74 years", "75-79 years", "80-84 years", "85-89 years", "90-94 years", "95 years or older")
cost2 <- cost1 %>%
filter(breakdown_group_1 %in% c("Female", "Male")) %>%
summarize(age=ifelse(breakdown_group_2 %in% kid, "0-14", ifelse(breakdown_group_2 %in% young_adult, "15-29", ifelse(breakdown_group_2 %in% adult, "30-49", ifelse(breakdown_group_2 %in% elderly, "50-69", "70+")))),
breakdown_group_1,
value) %>%
group_by(age, breakdown_group_1) %>%
summarize(total_cost=sum(as.numeric(value)))
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.
cost_age <- cost2 %>%
ungroup %>%
group_by(age)%>%
summarize(total_age=sum(as.numeric(total_cost)))
cost_gender <- cost2 %>%
ungroup %>%
group_by(breakdown_group_1)%>%
summarize(total_gender=sum(as.numeric(total_cost)))
source <- c(cost_age$age, rep("Cost", times = nrow(cost_gender)))
target <- c(rep("Cost", times = nrow(cost_age)), cost_gender$breakdown_group_1)
df_sanky <- data.frame(
source = source,
target = target,
value = c(cost_age$total_age, cost_gender$total_gender)
)
library(networkD3)
##
## Attaching package: 'networkD3'
## The following object is masked from 'package:leaflet':
##
## JS
nodes <- data.frame(
name=c(as.character(df_sanky$source),
as.character(df_sanky$target)) %>% unique()
)
df_sanky$IDsource <- match(df_sanky$source, nodes$name)-1
df_sanky$IDtarget <- match(df_sanky$target, nodes$name)-1
node_color <- 'd3.scaleOrdinal() .domain(["0-14", "15-29", "30-49", "50-69", "70+",
"Cost", "Female", "Male"]) .range(["#235eb8", "#235eb8", "#756f9f" , "#8a8191", "#9c9481", "#aba771", "#b9bb5e", "#c6cf45"])'
p1 <- sankeyNetwork(Links = df_sanky, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name",
colourScale = node_color,
sinksRight=FALSE)
p1
#how do costs compare?
cost3 <- cost %>%
filter(org_name=="ALL SUBMITTERS",
breakdown!="Total Cost",
breakdown=="Diagnosis") %>%
pivot_wider(names_from=summary_type,
values_from=value) %>%
janitor::clean_names() %>%
mutate(cost_per_visit = as.numeric(total_cost)/as.numeric(activity_count)) %>%
arrange(#desc(activity_count),
desc(cost_per_visit))
chosen_diseases <- c("Pregnancy, childbirth and the puerperium", "Cardiac conditions", "Poisoning (inc overdose)", "Diabetes and other endocrinological conditions", "Diseases of the respiratory system", "Bites/stings", "Infectious disease", "Diseases of the digestive system", "Mental and behavioural disorders")
cost3 %>%
filter(breakdown_group_1 %in% chosen_diseases) %>%
ggplot(aes(x=fct_reorder(breakdown_group_1, as.numeric(cost_per_visit)), y=cost_per_visit, fill=breakdown_group_1)) +
geom_bar(stat="identity") +
coord_flip() +
theme_nhs() +
scale_fill_manual(values=c(palette)) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank()) +
labs(title = "Costs per visit vary significantly",
subtitle="Average cost per visit by category",
caption="Source: Digital NHS, Acute Patient Level Activity and Costing, 2019-20")
nhs_earnings<-read_csv("data/healthcare-spending/NHS Staff Annual Earnings Estimates to June 2022 in NHS Trusts and other core organisations in England, Provisional Statistics CSV text file.csv") %>%
clean_names()
nhs_earnings<-nhs_earnings%>%
mutate(year=format(date, format="%Y"))%>%
mutate(month=format(date, format="%m"))%>%
filter(!is.na(sample_size))%>%
mutate(gov_spending=amount*sample_size)%>%
mutate(year=format(date, format="%Y"))%>%
mutate(month=format(date, format="%m"))
nhs_earnings_sep<-nhs_earnings%>%
filter(month=="08")%>%
filter(payment_type=="PUBGRP_010_BASIC_PAY_PER_FTE")%>%
filter(staff_group=="All staff")
countperyear<-nhs_earnings%>%
filter(payment_type=="PUBGRP_010_BASIC_PAY_PER_FTE")%>%
filter(staff_group=="All staff")
nhs_earnings_sep<-nhs_earnings%>%
filter(month=="08")%>%
filter(payment_type=="PUBGRP_010_BASIC_PAY_PER_FTE")%>%
filter(staff_group=="All staff")
g<-ggplot(countperyear,aes(x=date, y=sample_size/1000000))+geom_line()+
labs(x="Date",y="Number of Workers (million)",title="Steady increase in NHS workers since 2013",subtitle = "Number of emplyees over time", caption = "NHS Workforce Statistics - July 2022 (NHS, 2022)")+
geom_point(data=nhs_earnings_sep, aes(x=date, y=sample_size/1000000), colour="#F8766D") +
theme_nhs()
g
vacancy <- read.csv("data/vacancy/vacancy_region.csv") %>%
clean_names() %>%
rename(nhscr16nm = region) %>%
mutate(vacancy = as.numeric(str_remove(vacancy, "%")),
vacancy_number = as.numeric(str_remove_all(vacancy_number, ",")))
vacancy
## nhscr16nm vacancy vacancy_number
## 1 London 10.9 26002
## 2 South of England 7.3 22969
## 3 North of England 6.3 26079
## 4 Midlands and East of England 8.2 30805
# make sure you have the same direcory stucture to get London wards shapefile
uk_map <- read_sf("data/vacancy/NHS_shape/NHS_Commissioning_Regions_April_2016_Generalised_Clipped_Boundaries_in_England.shp")
uk_map_wgs84 <- uk_map %>%
st_transform(4326)
uk_map_wgs84_with_value <- uk_map_wgs84 %>%
mutate(nhscr16nm = ifelse(nhscr16nm == "North Of England", "North of England", nhscr16nm)) %>%
left_join(vacancy, by = "nhscr16nm")
vac_map <- uk_map_wgs84_with_value %>%
ggplot(aes()) +
geom_sf(aes(fill = vacancy, text = paste(nhscr16nm, "\nVacancy rate: ", vacancy, "%", sep = "")), colour = "#ffffff", size = 0.1) +
scale_fill_gradientn("%Vacancy", colors = c("#153A70", "#235eb8", "#296ED6", "#3082FC"),
labels = c("10%", "9%", "8%", "7%"),
breaks = c(0.1, 0.09, 0.08, 0.07)) +
theme_nhs() +
theme(
legend.position = "right"
) +
labs(title = "London NHS Vacancy was above 10% \n Source: NHS Digital, 2021/22 Q4 (Mar-22)")
ggplotly(vac_map, tooltip = "text")
physicians <- read_csv('data/vacancy/physicians_per_1000.csv', show_col_types = FALSE)
## New names:
## • `` -> `...67`
physicians_clean <- physicians %>%
select("Country Name", "2010", "2019") %>%
pivot_longer(cols = 2:3, names_to = "year", values_to = "physicians_per_1000") %>%
mutate(year = as.Date(paste(year, 1, 1, sep = "-")),
year = year(ymd(year))) %>%
clean_names()
p1 <- physicians_clean %>%
filter(year == 2019) %>%
drop_na() %>%
arrange(desc(physicians_per_1000)) %>%
top_n(10) %>%
mutate(color = as.factor(ifelse(country_name == "United Kingdom", 1, 0))) %>%
ggplot(aes(x = physicians_per_1000, y = fct_reorder(country_name, physicians_per_1000, max, desc = TRUE))) +
geom_col(aes(fill = color)) +
scale_fill_manual(values = two_color) +
theme_nhs() +
labs(y = NULL, x = 2019) +
theme(
panel.grid.major.x = element_line(color = "grey80"),
panel.grid.minor.x = element_line(color = "grey80")
)
p2 <- physicians_clean %>%
filter(year == 2010) %>%
drop_na() %>%
arrange(desc(physicians_per_1000)) %>%
top_n(10) %>%
mutate(color = as.factor(ifelse(country_name == "United Kingdom", 1, 0))) %>%
ggplot(aes(x = physicians_per_1000, y = fct_reorder(country_name, physicians_per_1000, max, desc = TRUE))) +
geom_col(aes(fill = color)) +
scale_fill_manual(values = two_color) +
theme_nhs() +
labs(y = NULL, x = 2010) +
theme(
panel.grid.major.x = element_line(color = "grey80"),
panel.grid.minor.x = element_line(color = "grey80")
)
(p2 + p1) +
plot_annotation(
title = "Physicians per 1k population increased from 2010 to 2019",
subtitle = "Year 2010 v.s. Year 2019",
caption = "NHS Vacancy Statistics England April 2015 – June 2022 Experimental Statistics"
) &
theme_nhs()
# NHS Staff Earnings Estimates (Source: https://digital.nhs.uk/data-and-information/publications/statistical/nhs-staff-earnings-estimates)
nhs_salary <- read_csv("data/healthcare-spending/NHS Staff Annual Earnings Estimates to June 2022 in NHS Trusts and other core organisations in England, Provisional Statistics CSV text file.csv") %>%
clean_names()
#Average weekly earnings of people in the UK Whole Economy Level (£): Seasonally Adjusted Total Pay Excluding Arrears (Source:https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/earningsandworkinghours/timeseries/kab9/emp)
uk_salary <- read_csv("data/healthcare-spending/UK_salary.csv", skip=7)%>%
clean_names() %>%
filter(important_notes %in% c("2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021")) %>%
mutate(yearly_salary_uk=x2*52) %>%
rename(year=important_notes) %>%
summarize(year, yearly_salary_uk)
nhs_salary2 <- nhs_salary %>%
filter(payment_type == "PUBGRP_020_EARNINGS",
staff_group == "All staff") %>%
mutate(year = year(date)) %>%
group_by(year) %>%
summarize(yearly_earnings=sum(amount)) %>%
filter(year != "2022") %>%
rename(yearly_earnings_nhs=yearly_earnings)
salary_data <- merge(uk_salary, nhs_salary2, by="year")
salary_data1 <- pivot_longer(salary_data,
cols=2:3,
names_to="type",
values_to="yearly_salary")
salary_data1 %>%
ggplot(aes(x=year, y=yearly_salary, color=type)) +
geom_point() +
theme_nhs() +
theme(legend.position = "none",
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey80"),
plot.title.position = "plot",
plot.title = ggtext::element_textbox_simple(size=16)) +
scale_color_manual(values = c(palette[1], palette[7])) +
labs(title = "<b> NHS salaries are bad... but maybe it's just a sign of a wider problem?</b><br>
<span style = 'font-size:12pt'> Average salary (£) over the years in the <span style='color:#235eb8'>NHS</span> compared to the <span style='color:#b9bb5e'>UK average </span></span>",
y = NULL, x = NULL,
caption = "Source: ONS Whole Economy Level (£): Seasonally Adjusted Total Pay, NHS Staff Earning Estimates")
# load data
wl <- read.csv("data/waiting_list/nhs_waiting_list_time_series.csv")
wl_ts <-wl %>%
pivot_longer(-month, names_to = "indicator", values_to = "value") %>%
separate(indicator, c("category", "index"), "_") %>%
mutate(category = str_replace_all(category, "\\.", "_"),
index = str_replace_all(str_replace_all(index, "\\.", "_"),
"__", "_")) %>%
mutate(index = if_else(str_detect(index, "_$"),
substr(index, 1, nchar(index) - 1), index),
category = if_else(str_detect(category, "_$"),
substr(category, 1, nchar(category) - 1),
category)) %>%
mutate(index = if_else(str_detect(index, "No__"),
str_replace(index, "No__", "Num_more_"),
str_replace(index, "No", "Num"))) %>%
mutate(index = if_else(str_detect(index, "^_"),
str_replace(
str_replace(index, "_within", "perc_within"),
"__", "perc_"),
index)) %>%
mutate(month = as.Date(paste("01-", substr(month, 4, 6),
"-", substr(month, 1, 2), sep=""), format = "%d-%b-%y")) %>%
mutate(category = as.factor(category),
index = as.factor(index))
# show data
head(wl_ts)
## # A tibble: 6 × 4
## month category index value
## <date> <fct> <fct> <dbl>
## 1 2010-01-01 Incomplete_RTT_pathways Median_wait_weeks 6.5
## 2 2010-01-01 Incomplete_RTT_pathways 92nd_percentile_weeks 20.3
## 3 2010-01-01 Incomplete_RTT_pathways Num_within_18_weeks 2075443
## 4 2010-01-01 Incomplete_RTT_pathways perc_within_18_weeks 0.9
## 5 2010-01-01 Incomplete_RTT_pathways Num_more_18_weeks 239617
## 6 2010-01-01 Incomplete_RTT_pathways Num_more_52_weeks 20737
skimr::skim(wl_ts)
| Name | wl_ts |
| Number of rows | 3060 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| Date | 1 |
| factor | 2 |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| month | 0 | 1 | 2010-01-01 | 2022-09-01 | 2016-05-01 | 153 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| category | 0 | 1 | FALSE | 3 | Inc: 1530, Adm: 765, Non: 765 |
| index | 0 | 1 | FALSE | 12 | Med: 459, Num: 459, Num: 459, 95t: 306 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| value | 270 | 0.91 | 331913.3 | 749873.8 | 0 | 7 | 182.5 | 276497.2 | 4257514 | ▇▁▁▁▁ |
Since the pandemic, the number of patients waiting for treatment has been increasing.
The waiting list’s size has accually doubled compared with that 6 years ago.
data <- wl_ts %>%
filter(category == "Incomplete_RTT_pathways" &
index %in% c("Num_within_18_weeks", "Num_more_18_weeks") &
month >= "2012-09-01") %>%
group_by(month) %>%
summarize(total_wl = sum(value))
p_wl_incomplete_rtt <- data %>%
ggplot(aes(x = month, y = total_wl)) +
theme_nhs() +
theme(panel.grid.major.y = element_line(color = "grey80"),
panel.grid.minor.y = element_line(color = "grey80"),
axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
axis.ticks = element_line(color = "grey80")) +
geom_bar(stat = "identity", fill = two_color[1]) +
scale_y_continuous(expand = c(0, 0),
labels = unit_format(unit = "million", scale = 1e-6),
breaks = seq(0, 7e6, 1e6)) +
scale_x_date(limits = as.Date(c("2012-09-01", "2022-09-01")),
expand = c(0, 0), date_labels = "%b-%y",
breaks = date_breaks(width = "6 month")) +
labs(x = NULL, y = NULL,
title = "NHS waiting list has been increasing since the pandemic",
subtitle = "Sep 2012 to Sep 2022",
caption = "Source: Consultant-led Referral to Treatment Waiting Times Data 2022-23"
) +
geom_vline(xintercept=as.numeric(as.Date("2020-03-01")), linetype=2, col = "black") +
annotate("text", x = as.Date("2018-09-01"), y = 6.5*1e6,
label = "Start of the pandemic", hjust = 0.5, color = "black")
p_wl_incomplete_rtt
data <- wl_ts %>%
filter(category == "Incomplete_RTT_pathways" &
index %in% c("Num_more_18_weeks", "Num_more_52_weeks") &
month >= "2012-09-01") %>%
group_by(month, index) %>%
summarize(total_wl = sum(value)) %>%
mutate(label = ifelse(month != as.Date("2022-09-01"), NA_character_,
ifelse(index == "Num_more_18_weeks", "18+ w", "52+ w")))
p_wl_18_52 <- data %>%
ggplot(aes(x = month, y = total_wl, fill = as.factor(index))) +
theme_nhs() +
theme(axis.line.x = element_blank(),
axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
axis.ticks = element_line(color = "grey80")) +
geom_segment(
data = tibble(y = seq(0, 3200000, 400000),
x1 = as.Date("2012-09-01"),
x2 = as.Date("2022-09-01")),
aes(x = x1, xend = x2, y = y, yend = y),
inherit.aes = FALSE,
color = "grey80",
size = .4
) +
geom_segment(
data = tibble(y = 0, x1 = as.Date("2012-09-01"), x2 = as.Date("2022-09-01")),
aes(x = x1, xend = x2, y = y, yend = y),
inherit.aes = FALSE,
color = "grey40",
size = .4
) +
geom_area(stat = "identity", position = "identity", color = two_color[1], alpha = 0.5) +
scale_fill_manual(values = c("#7393E6", palette[1])) +
scale_y_continuous(expand = c(0, 0), breaks = seq(0, 3200000, 400000),
label = comma) +
labs(x = NULL, y = NULL,
title = "More atients waiting FOREVER since the pandemic",
subtitle = "Number of patients waiting over 18 and 52 weeks, Sep 2012 to Sep 2022",
caption = "Source: Consultant-led Referral to Treatment Waiting Times Data 2022-23"
) +
geom_vline(xintercept=as.numeric(as.Date("2020-03-01")), linetype=2, col = "black") +
annotate("text", x = as.Date("2018-09-01"), y = 3000000,
label = "Start of the pandemic", hjust = 0.5, color = "black") +
scale_x_date(limits = c(as.Date("2012-09-01"), as.Date("2024-03-01")),
date_labels = "%y-%b", expand = c(0, 0),
breaks = c(as.Date("2012-09-01"), as.Date("2013-03-01"),
as.Date("2013-09-01"), as.Date("2014-03-01"),
as.Date("2014-09-01"), as.Date("2015-03-01"),
as.Date("2015-09-01"), as.Date("2016-03-01"),
as.Date("2016-09-01"), as.Date("2017-03-01"),
as.Date("2017-09-01"), as.Date("2018-03-01"),
as.Date("2018-09-01"), as.Date("2019-03-01"),
as.Date("2019-09-01"), as.Date("2020-03-01"),
as.Date("2020-09-01"), as.Date("2021-03-01"),
as.Date("2021-09-01"), as.Date("2022-03-01"), as.Date("2022-09-01")
))+
geom_text_repel(
aes(color = index, label = label),
xlim = c(as.Date("2022-09-01"), NA),
family = "Helvetica",
fontface = "bold",
size = 4,
segment.size = .7,
segment.alpha = .5,
segment.linetype = "dotted"
) +
scale_color_manual(values = c("#7393E6", palette[1]))
p_wl_18_52
data <- wl_ts %>%
filter((category == "Incomplete_RTT_pathways") &
(index %in% c("perc_within_18_weeks", "perc_52_weeks")) &
(month >= "2012-09-01")) %>%
group_by(month, index) %>%
summarize(total_wl = sum(value)) %>%
mutate(total_wl = ifelse(index == "perc_within_18_weeks", 1 - total_wl, total_wl),
index = ifelse(index == "perc_within_18_weeks", "perc_above_18_weeks", "perc_52_weeks")) %>%
mutate(label = ifelse(month != as.Date("2022-09-01"), NA_character_,
ifelse(index == "perc_above_18_weeks", "%18+ w", "%52+ w")))
p_wl_18_52_perc <- data %>%
ggplot(aes(x = month, y = total_wl, fill = as.factor(index))) +
theme_nhs() +
theme(axis.line.x = element_blank(),
axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
axis.ticks = element_line(color = "grey80")) +
geom_segment(
data = tibble(y = seq(0, 0.6, 0.05),
x1 = as.Date("2012-09-01"),
x2 = as.Date("2022-09-01")),
aes(x = x1, xend = x2, y = y, yend = y),
inherit.aes = FALSE,
color = "grey80",
size = .4
) +
geom_segment(
data = tibble(y = 0, x1 = as.Date("2012-09-01"), x2 = as.Date("2022-09-01")),
aes(x = x1, xend = x2, y = y, yend = y),
inherit.aes = FALSE,
color = "grey40",
size = .4
) +
geom_area(stat = "identity", position = "identity", color = two_color[1], alpha = 0.5) +
scale_fill_manual(values = c(palette[1], "#7393E6")) +
scale_y_continuous(labels = scales::percent,
expand = c(0, 0), breaks = seq(0, 0.6, 0.05)) +
labs(x = NULL, y = NULL,
title = "More atients waiting FOREVER since the pandemic",
subtitle = "Percentage of patients waiting over 18 and 52 weeks, Sep 2012 to Sep 2022",
caption = "Source: Consultant-led Referral to Treatment Waiting Times Data 2022-23"
) +
geom_vline(xintercept=as.numeric(as.Date("2020-03-01")), linetype=2, col = "black") +
annotate("text", x = as.Date("2018-09-01"), y = 0.57,
label = "Start of the pandemic", hjust = 0.5, color = "black") +
scale_x_date(limits = c(as.Date("2012-09-01"), as.Date("2024-03-01")),
date_labels = "%y-%b", expand = c(0, 0),
breaks = c(as.Date("2012-09-01"), as.Date("2013-03-01"),
as.Date("2013-09-01"), as.Date("2014-03-01"),
as.Date("2014-09-01"), as.Date("2015-03-01"),
as.Date("2015-09-01"), as.Date("2016-03-01"),
as.Date("2016-09-01"), as.Date("2017-03-01"),
as.Date("2017-09-01"), as.Date("2018-03-01"),
as.Date("2018-09-01"), as.Date("2019-03-01"),
as.Date("2019-09-01"), as.Date("2020-03-01"),
as.Date("2020-09-01"), as.Date("2021-03-01"),
as.Date("2021-09-01"), as.Date("2022-03-01"), as.Date("2022-09-01")
))+
geom_text_repel(
aes(color = index, label = label),
xlim = c(as.Date("2022-09-01"), NA),
family = "Helvetica",
fontface = "bold",
size = 4,
segment.size = .7,
segment.alpha = .5,
segment.linetype = "dotted"
) +
scale_color_manual(values = c(palette[1], "#7393E6"))
p_wl_18_52_perc
bd <- read.csv("data/waiting_list/breakdown.csv")
names(bd) <- c("function_code", "function_name", "month", "total_wl", "total_within_18w", "perc_within_18w", "total_52plusw")
bd_clean <- bd %>%
mutate(
month = as.Date(paste("01-", substr(month, 4, 6),
"-", substr(month, 1, 2), sep=""), format = "%d-%b-%y")
)
total_22 <- (bd_clean %>% filter(month == as.Date("2022-09-01")) %>% select(function_code, total_wl) %>% rename("Sep-2022" = "total_wl"))
total_19 <- (bd_clean %>% filter(month == as.Date("2019-09-01")) %>% select(function_code, total_wl) %>% rename("Sep-2019" = "total_wl"))
total_2yr <- total_19 %>%
left_join(total_22, by = "function_code") %>%
left_join(
bd_clean %>%
filter(month == as.Date("2022-09-01")) %>%
select(function_code, function_name), by = "function_code"
) %>%
select(function_name, `Sep-2019`, `Sep-2022`)
total_2yr_4plot <- total_2yr %>%
rowwise() %>%
mutate( mean_tot = mean(c(`Sep-2019`,`Sep-2022`) )) %>%
arrange(mean_tot) %>%
mutate(function_name=factor(function_name, function_name))
# Plot
total_2yr_4plot %>% filter(function_name != "Other") %>%
ggplot() +
geom_segment( aes(x=function_name, xend=function_name,
y=`Sep-2019`, yend=`Sep-2022`), color="grey20") +
geom_point( aes(x=function_name, y=`Sep-2019`), color=two_color[1], size=3 ) +
geom_point( aes(x=function_name, y=`Sep-2022`), color=two_color[2], size=3 ) +
coord_flip()+
theme_nhs() +
theme(
legend.position = "none",
panel.grid.major = element_line(color = "grey80")
) +
scale_y_continuous(label = comma) +
labs(
title = "Waiting List Expansion by service",
subtitle = "Sep 2019 v.s. Sep 2022",
y = "Total Number of Incomplete Pathways", x = "")